Take-home Exercise 1

A short description of the post.

Genice Goh true
09-10-2021

1.0 Introduction

This take-home exercise aims to reveal the spatial-temporal patterns of monthly cumulative confirmed COVID-19 rates and death rates in Jakarta DKI at the sub-district level.

2.0 The Data

For this assignment, the following data is used:

3.0 Instaling and Loading Packages

In this take-home exercise, the packages sf, tidyverse, tmap and readxl are used.

packages = c('sf', 'tidyverse', 'tmap', 'readxl')
for (p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p,character.only = T)
}

4.0 Data Preparation and Wrangling

4.1 Geospatial Data

The geospatial data is imported with the use of st_read().

DKI <- st_read(dsn="data/geospatial",
               layer="BATAS_DESA_DESEMBER_2019_DUKCAPIL_DKI_JAKARTA")
Reading layer `BATAS_DESA_DESEMBER_2019_DUKCAPIL_DKI_JAKARTA' from data source `C:\Users\user\Desktop\SMU\Y4S1\IS415\geniceee\IS415_blog\_posts\2021-09-10-take-home-exercise-1\data\geospatial' 
  using driver `ESRI Shapefile'
Simple feature collection with 269 features and 161 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 106.3831 ymin: -6.370815 xmax: 106.9728 ymax: -5.184322
Geodetic CRS:  WGS 84
glimpse(DKI)
Rows: 269
Columns: 162
$ OBJECT_ID  <dbl> 25477, 25478, 25397, 25400, 25378, 25379, 25390, ~
$ KODE_DESA  <chr> "3173031006", "3173031007", "3171031003", "317103~
$ DESA       <chr> "KEAGUNGAN", "GLODOK", "HARAPAN MULIA", "CEMPAKA ~
$ KODE       <dbl> 317303, 317303, 317103, 317103, 310101, 310101, 3~
$ PROVINSI   <chr> "DKI JAKARTA", "DKI JAKARTA", "DKI JAKARTA", "DKI~
$ KAB_KOTA   <chr> "JAKARTA BARAT", "JAKARTA BARAT", "JAKARTA PUSAT"~
$ KECAMATAN  <chr> "TAMAN SARI", "TAMAN SARI", "KEMAYORAN", "KEMAYOR~
$ DESA_KELUR <chr> "KEAGUNGAN", "GLODOK", "HARAPAN MULIA", "CEMPAKA ~
$ JUMLAH_PEN <dbl> 21609, 9069, 29085, 41913, 6947, 7059, 15793, 589~
$ JUMLAH_KK  <dbl> 7255, 3273, 9217, 13766, 2026, 2056, 5599, 1658, ~
$ LUAS_WILAY <dbl> 0.36, 0.37, 0.53, 0.97, 0.93, 0.95, 1.76, 1.14, 0~
$ KEPADATAN  <dbl> 60504, 24527, 54465, 42993, 7497, 7401, 8971, 515~
$ PERPINDAHA <dbl> 102, 25, 131, 170, 17, 26, 58, 13, 113, 178, 13, ~
$ JUMLAH_MEN <dbl> 68, 52, 104, 151, 14, 32, 36, 10, 60, 92, 5, 83, ~
$ PERUBAHAN  <dbl> 20464, 8724, 27497, 38323, 6853, 6993, 15006, 580~
$ WAJIB_KTP  <dbl> 16027, 7375, 20926, 30264, 4775, 4812, 12559, 398~
$ SILAM      <dbl> 15735, 1842, 26328, 36813, 6941, 7057, 7401, 5891~
$ KRISTEN    <dbl> 2042, 2041, 1710, 3392, 6, 0, 3696, 0, 4058, 5130~
$ KHATOLIK   <dbl> 927, 1460, 531, 1082, 0, 0, 1602, 0, 2100, 2575, ~
$ HINDU      <dbl> 15, 9, 42, 127, 0, 0, 622, 0, 25, 27, 0, 9, 115, ~
$ BUDHA      <dbl> 2888, 3716, 469, 495, 0, 2, 2462, 0, 4134, 4740, ~
$ KONGHUCU   <dbl> 2, 1, 5, 1, 0, 0, 10, 0, 9, 10, 0, 4, 1, 1, 4, 0,~
$ KEPERCAYAA <dbl> 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 2, 0, 22, 0, 3, ~
$ PRIA       <dbl> 11049, 4404, 14696, 21063, 3547, 3551, 7833, 2954~
$ WANITA     <dbl> 10560, 4665, 14389, 20850, 3400, 3508, 7960, 2937~
$ BELUM_KAWI <dbl> 10193, 4240, 14022, 20336, 3366, 3334, 7578, 2836~
$ KAWIN      <dbl> 10652, 4364, 13450, 19487, 3224, 3404, 7321, 2791~
$ CERAI_HIDU <dbl> 255, 136, 430, 523, 101, 80, 217, 44, 381, 476, 3~
$ CERAI_MATI <dbl> 509, 329, 1183, 1567, 256, 241, 677, 220, 1197, 9~
$ U0         <dbl> 1572, 438, 2232, 3092, 640, 648, 802, 585, 2220, ~
$ U5         <dbl> 1751, 545, 2515, 3657, 645, 684, 995, 588, 2687, ~
$ U10        <dbl> 1703, 524, 2461, 3501, 620, 630, 1016, 513, 2653,~
$ U15        <dbl> 1493, 521, 2318, 3486, 669, 671, 1106, 548, 2549,~
$ U20        <dbl> 1542, 543, 2113, 3098, 619, 609, 1081, 491, 2313,~
$ U25        <dbl> 1665, 628, 2170, 3024, 639, 582, 1002, 523, 2446,~
$ U30        <dbl> 1819, 691, 2363, 3188, 564, 592, 1236, 478, 2735,~
$ U35        <dbl> 1932, 782, 2595, 3662, 590, 572, 1422, 504, 3034,~
$ U40        <dbl> 1828, 675, 2371, 3507, 480, 486, 1200, 397, 2689,~
$ U45        <dbl> 1600, 607, 2250, 3391, 421, 457, 1163, 365, 2470,~
$ U50        <dbl> 1408, 619, 1779, 2696, 346, 369, 1099, 288, 2129,~
$ U55        <dbl> 1146, 602, 1379, 1909, 252, 318, 979, 235, 1843, ~
$ U60        <dbl> 836, 614, 1054, 1397, 197, 211, 880, 162, 1386, 1~
$ U65        <dbl> 587, 555, 654, 970, 122, 114, 747, 111, 958, 932,~
$ U70        <dbl> 312, 311, 411, 631, 69, 55, 488, 65, 554, 573, 38~
$ U75        <dbl> 415, 414, 420, 704, 74, 61, 577, 38, 717, 642, 37~
$ TIDAK_BELU <dbl> 3426, 1200, 4935, 7328, 1306, 1318, 2121, 973, 50~
$ BELUM_TAMA <dbl> 1964, 481, 2610, 3763, 730, 676, 1278, 732, 3241,~
$ TAMAT_SD   <dbl> 2265, 655, 2346, 2950, 1518, 2054, 1169, 1266, 44~
$ SLTP       <dbl> 3660, 1414, 3167, 5138, 906, 1357, 2236, 852, 585~
$ SLTA       <dbl> 8463, 3734, 12172, 16320, 2040, 1380, 5993, 1570,~
$ DIPLOMA_I  <dbl> 81, 23, 84, 179, 22, 15, 43, 36, 85, 83, 4, 63, 2~
$ DIPLOMA_II <dbl> 428, 273, 1121, 1718, 101, 59, 573, 97, 604, 740,~
$ DIPLOMA_IV <dbl> 1244, 1241, 2477, 4181, 314, 191, 2199, 357, 1582~
$ STRATA_II  <dbl> 74, 46, 166, 315, 10, 8, 168, 8, 63, 92, 5, 174, ~
$ STRATA_III <dbl> 4, 2, 7, 21, 0, 1, 13, 0, 3, 9, 0, 16, 8, 7, 75, ~
$ BELUM_TIDA <dbl> 3927, 1388, 5335, 8105, 1788, 1627, 2676, 1129, 5~
$ APARATUR_P <dbl> 81, 10, 513, 931, 246, 75, 156, 160, 132, 79, 23,~
$ TENAGA_PEN <dbl> 70, 43, 288, 402, 130, 93, 81, 123, 123, 73, 45, ~
$ WIRASWASTA <dbl> 8974, 3832, 10662, 14925, 788, 728, 6145, 819, 12~
$ PERTANIAN  <dbl> 1, 0, 1, 3, 2, 2, 1, 3, 2, 5, 1, 1, 0, 0, 2, 5, 2~
$ NELAYAN    <dbl> 0, 0, 2, 0, 960, 1126, 1, 761, 1, 2, 673, 0, 0, 0~
$ AGAMA_DAN  <dbl> 6, 6, 5, 40, 0, 0, 49, 2, 10, 11, 0, 54, 15, 16, ~
$ PELAJAR_MA <dbl> 4018, 1701, 6214, 9068, 1342, 1576, 3135, 1501, 6~
$ TENAGA_KES <dbl> 28, 29, 80, 142, 34, 26, 60, 11, 48, 55, 16, 68, ~
$ PENSIUNAN  <dbl> 57, 50, 276, 498, 20, 7, 59, 14, 56, 75, 2, 97, 5~
$ LAINNYA    <dbl> 4447, 2010, 5709, 7799, 1637, 1799, 3430, 1368, 7~
$ GENERATED  <chr> "30 Juni 2019", "30 Juni 2019", "30 Juni 2019", "~
$ KODE_DES_1 <chr> "3173031006", "3173031007", "3171031003", "317103~
$ BELUM_     <dbl> 3099, 1032, 4830, 7355, 1663, 1704, 2390, 1213, 5~
$ MENGUR_    <dbl> 4447, 2026, 5692, 7692, 1576, 1731, 3500, 1323, 7~
$ PELAJAR_   <dbl> 3254, 1506, 6429, 8957, 1476, 1469, 3185, 1223, 6~
$ PENSIUNA_1 <dbl> 80, 65, 322, 603, 24, 8, 70, 20, 75, 97, 2, 132, ~
$ PEGAWAI_   <dbl> 48, 5, 366, 612, 223, 72, 65, 143, 73, 48, 15, 89~
$ TENTARA    <dbl> 4, 0, 41, 57, 3, 0, 74, 1, 20, 12, 2, 11, 90, 340~
$ KEPOLISIAN <dbl> 10, 1, 16, 42, 11, 8, 2, 9, 17, 7, 3, 9, 165, 15,~
$ PERDAG_    <dbl> 31, 5, 1, 3, 6, 1, 2, 4, 3, 1, 4, 0, 1, 2, 9, 2, ~
$ PETANI     <dbl> 0, 0, 1, 2, 0, 1, 1, 0, 1, 1, 1, 2, 0, 0, 1, 2, 0~
$ PETERN_    <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ NELAYAN_1  <dbl> 1, 0, 1, 0, 914, 1071, 0, 794, 0, 1, 663, 0, 0, 0~
$ INDUSTR_   <dbl> 7, 3, 4, 3, 1, 3, 0, 0, 1, 7, 0, 0, 2, 2, 1, 3, 1~
$ KONSTR_    <dbl> 3, 0, 2, 6, 3, 8, 1, 6, 1, 5, 10, 0, 2, 5, 7, 4, ~
$ TRANSP_    <dbl> 2, 0, 7, 4, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 6, 3, 2~
$ KARYAW_    <dbl> 6735, 3034, 7347, 10185, 237, 264, 4319, 184, 940~
$ KARYAW1    <dbl> 9, 2, 74, 231, 4, 0, 16, 1, 13, 10, 1, 24, 17, 29~
$ KARYAW1_1  <dbl> 0, 0, 5, 15, 0, 0, 0, 1, 0, 1, 0, 0, 2, 4, 7, 9, ~
$ KARYAW1_12 <dbl> 23, 4, 25, 35, 141, 50, 16, 157, 6, 9, 40, 11, 11~
$ BURUH      <dbl> 515, 155, 971, 636, 63, 218, 265, 55, 1085, 652, ~
$ BURUH_     <dbl> 1, 0, 0, 0, 2, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 2, 1~
$ BURUH1     <dbl> 0, 0, 1, 0, 1, 25, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, ~
$ BURUH1_1   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ PEMBANT_   <dbl> 1, 1, 4, 1, 1, 0, 7, 0, 5, 1, 0, 6, 1, 10, 11, 9,~
$ TUKANG     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0~
$ TUKANG_1   <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ TUKANG_12  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ TUKANG__13 <dbl> 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0~
$ TUKANG__14 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ TUKANG__15 <dbl> 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0~
$ TUKANG__16 <dbl> 7, 4, 10, 14, 0, 0, 2, 0, 7, 8, 0, 8, 1, 0, 3, 2,~
$ TUKANG__17 <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0~
$ PENATA     <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0~
$ PENATA_    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ PENATA1_1  <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 1, 0, 0, 0~
$ MEKANIK    <dbl> 11, 1, 10, 8, 0, 0, 4, 0, 7, 8, 0, 9, 0, 15, 10, ~
$ SENIMAN_   <dbl> 4, 0, 12, 28, 0, 0, 2, 0, 3, 4, 0, 9, 6, 7, 14, 1~
$ TABIB      <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0~
$ PARAJI_    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ PERANCA_   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 1, 0, 1~
$ PENTER_    <dbl> 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 2~
$ IMAM_M     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ PENDETA    <dbl> 2, 4, 5, 33, 0, 0, 20, 0, 10, 8, 0, 30, 14, 14, 1~
$ PASTOR     <dbl> 0, 1, 0, 1, 0, 0, 8, 0, 0, 0, 0, 23, 0, 0, 0, 0, ~
$ WARTAWAN   <dbl> 7, 1, 16, 27, 0, 0, 4, 0, 8, 6, 0, 9, 5, 9, 26, 3~
$ USTADZ     <dbl> 6, 1, 1, 5, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0~
$ JURU_M     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0~
$ PROMOT     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ ANGGOTA_   <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 2, 1, 2~
$ ANGGOTA1   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0~
$ ANGGOTA1_1 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ PRESIDEN   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ WAKIL_PRES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1~
$ ANGGOTA1_2 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0~
$ ANGGOTA1_3 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0~
$ DUTA_B     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1~
$ GUBERNUR   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1~
$ WAKIL_GUBE <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ BUPATI     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ WAKIL_BUPA <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ WALIKOTA   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ WAKIL_WALI <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ ANGGOTA1_4 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1~
$ ANGGOTA1_5 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ DOSEN      <dbl> 3, 2, 23, 36, 1, 2, 11, 0, 3, 5, 0, 14, 6, 28, 69~
$ GURU       <dbl> 72, 40, 272, 378, 118, 72, 69, 116, 126, 71, 36, ~
$ PILOT      <dbl> 1, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 1~
$ PENGACARA_ <dbl> 4, 1, 8, 22, 0, 0, 5, 0, 5, 4, 0, 4, 3, 12, 24, 2~
$ NOTARIS    <dbl> 0, 0, 3, 5, 0, 0, 4, 0, 0, 0, 0, 5, 0, 5, 10, 3, ~
$ ARSITEK    <dbl> 1, 0, 2, 3, 0, 0, 2, 0, 0, 0, 0, 4, 1, 2, 7, 3, 9~
$ AKUNTA_    <dbl> 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 2~
$ KONSUL_    <dbl> 1, 0, 2, 11, 0, 0, 4, 0, 0, 0, 0, 6, 2, 3, 10, 8,~
$ DOKTER     <dbl> 16, 32, 35, 68, 0, 1, 63, 0, 27, 32, 1, 63, 48, 6~
$ BIDAN      <dbl> 3, 1, 9, 18, 12, 8, 1, 3, 3, 3, 7, 3, 10, 10, 7, ~
$ PERAWAT    <dbl> 7, 0, 25, 44, 12, 10, 3, 6, 12, 20, 6, 7, 26, 16,~
$ APOTEK_    <dbl> 0, 0, 2, 3, 1, 0, 0, 0, 1, 2, 0, 1, 2, 3, 3, 3, 3~
$ PSIKIATER  <dbl> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 1~
$ PENYIA_    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0~
$ PENYIA1    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ PELAUT     <dbl> 0, 0, 6, 16, 1, 1, 0, 14, 2, 4, 1, 2, 4, 2, 10, 1~
$ PENELITI   <dbl> 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1~
$ SOPIR      <dbl> 65, 3, 94, 123, 0, 1, 61, 0, 76, 79, 0, 63, 44, 1~
$ PIALAN     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ PARANORMAL <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0~
$ PEDAGA_    <dbl> 379, 126, 321, 562, 11, 10, 412, 15, 202, 225, 0,~
$ PERANG_    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ KEPALA_    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0~
$ BIARAW_    <dbl> 0, 1, 0, 0, 0, 0, 22, 0, 3, 0, 0, 2, 1, 0, 4, 0, ~
$ WIRASWAST_ <dbl> 1370, 611, 1723, 3099, 131, 119, 1128, 259, 2321,~
$ LAINNYA_12 <dbl> 94, 57, 82, 122, 12, 10, 41, 6, 89, 158, 24, 37, ~
$ LUAS_DESA  <dbl> 25476, 25477, 25396, 25399, 25377, 25378, 25389, ~
$ KODE_DES_3 <chr> "3173031006", "3173031007", "3171031003", "317103~
$ DESA_KEL_1 <chr> "KEAGUNGAN", "GLODOK", "HARAPAN MULIA", "CEMPAKA ~
$ KODE_12    <dbl> 317303, 317303, 317103, 317103, 310101, 310101, 3~
$ geometry   <MULTIPOLYGON [°]> MULTIPOLYGON (((106.8164 -6..., MULT~

4.1.1 Checking for NA values

Since the value TRUE is returned, it implies that the dataset contains NA values.

sum(is.na(DKI)) > 0
[1] TRUE

The function drop_na is used to drop the NA values in the dataset.

DKI <- drop_na(DKI)

4.1.2 CRS Check and Reprojection

The st_crs() function is used to check the coordinate reference system of the dataset.

st_crs(DKI)
Coordinate Reference System:
  User input: WGS 84 
  wkt:
GEOGCRS["WGS 84",
    DATUM["World Geodetic System 1984",
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["latitude",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["longitude",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4326]]

Since the dataset is in WGS84 projection system, there is a need to transform it to the national projected coordinates system of Indonesia (DGN95/Indonesia TM-3 zone 54.1), which has an EPSG code of 23845.

DKI23845 <- st_transform(DKI, crs = 23845)
st_crs(DKI23845)
Coordinate Reference System:
  User input: EPSG:23845 
  wkt:
PROJCRS["DGN95 / Indonesia TM-3 zone 54.1",
    BASEGEOGCRS["DGN95",
        DATUM["Datum Geodesi Nasional 1995",
            ELLIPSOID["WGS 84",6378137,298.257223563,
                LENGTHUNIT["metre",1]]],
        PRIMEM["Greenwich",0,
            ANGLEUNIT["degree",0.0174532925199433]],
        ID["EPSG",4755]],
    CONVERSION["Indonesia TM-3 zone 54.1",
        METHOD["Transverse Mercator",
            ID["EPSG",9807]],
        PARAMETER["Latitude of natural origin",0,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8801]],
        PARAMETER["Longitude of natural origin",139.5,
            ANGLEUNIT["degree",0.0174532925199433],
            ID["EPSG",8802]],
        PARAMETER["Scale factor at natural origin",0.9999,
            SCALEUNIT["unity",1],
            ID["EPSG",8805]],
        PARAMETER["False easting",200000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8806]],
        PARAMETER["False northing",1500000,
            LENGTHUNIT["metre",1],
            ID["EPSG",8807]]],
    CS[Cartesian,2],
        AXIS["easting (X)",east,
            ORDER[1],
            LENGTHUNIT["metre",1]],
        AXIS["northing (Y)",north,
            ORDER[2],
            LENGTHUNIT["metre",1]],
    USAGE[
        SCOPE["Cadastre."],
        AREA["Indonesia - onshore east of 138°E."],
        BBOX[-9.19,138,-1.49,141.01]],
    ID["EPSG",23845]]

4.1.3 Exclude all outer islands

The tmap() function is used to plot the geospatial data for easy visualisation.

tmap_mode("view")
tm_shape(DKI23845) + 
  tm_polygons()

While looking at the data points of the outer islands using the interactive map, it can be observed that they share the same value “KEPULAUAN SERIBU” for the field “KAB_KOTA”. This information can therefore be used to exclude the outer islands from the dataframe.

tmap_mode('plot')

DKI23845 <- DKI23845 %>% 
  filter(`KAB_KOTA` != "KEPULAUAN SERIBU")

plot(DKI23845)

4.1.4 Select the first 9 fields

DKI23845 <- DKI23845 %>% select(0:9)

DKI23845
Simple feature collection with 261 features and 9 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -3644275 ymin: 663887.8 xmax: -3606237 ymax: 701380.1
Projected CRS: DGN95 / Indonesia TM-3 zone 54.1
First 10 features:
   OBJECT_ID  KODE_DESA               DESA   KODE    PROVINSI
1      25477 3173031006          KEAGUNGAN 317303 DKI JAKARTA
2      25478 3173031007             GLODOK 317303 DKI JAKARTA
3      25397 3171031003      HARAPAN MULIA 317103 DKI JAKARTA
4      25400 3171031006       CEMPAKA BARU 317103 DKI JAKARTA
5      25390 3171021001         PASAR BARU 317102 DKI JAKARTA
6      25391 3171021002       KARANG ANYAR 317102 DKI JAKARTA
7      25394 3171021005 MANGGA DUA SELATAN 317102 DKI JAKARTA
8      25386 3171011003       PETOJO UTARA 317101 DKI JAKARTA
9      25403 3171041001              SENEN 317104 DKI JAKARTA
10     25408 3171041006             BUNGUR 317104 DKI JAKARTA
        KAB_KOTA   KECAMATAN         DESA_KELUR JUMLAH_PEN
1  JAKARTA BARAT  TAMAN SARI          KEAGUNGAN      21609
2  JAKARTA BARAT  TAMAN SARI             GLODOK       9069
3  JAKARTA PUSAT   KEMAYORAN      HARAPAN MULIA      29085
4  JAKARTA PUSAT   KEMAYORAN       CEMPAKA BARU      41913
5  JAKARTA PUSAT SAWAH BESAR         PASAR BARU      15793
6  JAKARTA PUSAT SAWAH BESAR       KARANG ANYAR      33383
7  JAKARTA PUSAT SAWAH BESAR MANGGA DUA SELATAN      35906
8  JAKARTA PUSAT      GAMBIR       PETOJO UTARA      21828
9  JAKARTA PUSAT       SENEN              SENEN       8643
10 JAKARTA PUSAT       SENEN             BUNGUR      23001
                         geometry
1  MULTIPOLYGON (((-3626874 69...
2  MULTIPOLYGON (((-3627130 69...
3  MULTIPOLYGON (((-3621251 68...
4  MULTIPOLYGON (((-3620608 69...
5  MULTIPOLYGON (((-3624097 69...
6  MULTIPOLYGON (((-3624785 69...
7  MULTIPOLYGON (((-3624752 69...
8  MULTIPOLYGON (((-3626121 69...
9  MULTIPOLYGON (((-3623189 69...
10 MULTIPOLYGON (((-3622451 69...

4.2 Aspatial Data

The aspatial data is imported using the read_excel function. Since data is only required at the sub-district level, data will only be extracted from the “data” sheet.

mar20 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (31 Maret 2020 Pukul 08.00).xlsx", sheet="data")

apr20 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (30 April 2020 Pukul 09.00).xlsx", sheet="data") 

may20 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (31 MEI 2020 Pukul 09.00).xlsx", sheet="data") 

jun20 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (30 Juni 2020 Pukul 09.00).xlsx", sheet="data") 

jul20 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (31 Juli 2020 Pukul 09.00).xlsx", sheet="data") 

aug20 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (31 Agustus 2020 Pukul 10.00).xlsx", sheet="data") 

sep20 <- read_excel("data/aspatial/Standar_Kelurahan_Data_Corona_30_September_2020_Pukul_10_00.xlsx", sheet="data") 

oct20 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (31 Oktober 2020 Pukul 10.00).xlsx", sheet="data") 

nov20 <- read_excel("data/aspatial/Standar_Kelurahan_Data_Corona_30_November_2020_Pukul_10_00.xlsx", sheet="data") 

dec20 <- read_excel("data/aspatial/Standar_Kelurahan_Data_Corona_26_Desember_2020_Pukul_10_00.xlsx", sheet="data") 

jan21 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (30 Januari 2021 Pukul 10.00).xlsx", sheet="data") 

feb21 <- read_excel("data/aspatial/Standar_Kelurahan_Data_Corona_27_Februari_2021_Pukul_10_00.xlsx", sheet="data")

mar21 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (27 Maret 2021 Pukul 10.00).xlsx", sheet="data") 

apr21 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (24 April 2021 Pukul 10.00).xlsx", sheet="data")

may21 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (29 Mei 2021 Pukul 10.00).xlsx", sheet="data") 

jun21 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (26 Juni 2021 Pukul 10.00).xlsx", sheet="data") 

jul21 <- read_excel("data/aspatial/Standar Kelurahan Data Corona (31 Juli 2021 Pukul 10.00).xlsx", sheet="data")

4.2.1 Extracting Required Columns

In the case of duplicated columns, values from the 2nd column are retained for consistency. There is also a need to create a new column called Month, with values following the respective months.

mar20_filtered <- mar20 %>% 
  select("ID_KEL"="ID_KEL...2", "Nama_provinsi", "nama_kota", 
         "nama_kecamatan", "nama_kelurahan", "POSITIF", "Meninggal") %>%
  mutate("Month"= "Mar20")

apr20_filtered <- apr20 %>% 
  select("ID_KEL"="ID_KEL...2", "Nama_provinsi", "nama_kota", 
         "nama_kecamatan", "nama_kelurahan", "POSITIF", "Meninggal") %>%
  mutate("Month"= "Apr20")

may20_filtered <- may20 %>% 
  select("ID_KEL"="ID_KEL...2", "Nama_provinsi", "nama_kota", 
         "nama_kecamatan", "nama_kelurahan", "POSITIF", "Meninggal") %>%
  mutate("Month"= "May20")

jun20_filtered <- jun20 %>% 
  select("ID_KEL"="ID_KEL...2", "Nama_provinsi", "nama_kota", 
         "nama_kecamatan", "nama_kelurahan", "POSITIF", "Meninggal") %>%
  mutate("Month"= "Jun20")

jul20_filtered <- jul20 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...26") %>%
  mutate("Month"= "Jul20")

aug20_filtered <- aug20 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...28") %>%
  mutate("Month"= "Aug20")

sep20_filtered <- sep20 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...29") %>%
  mutate("Month"= "Sep20")

oct20_filtered <- oct20 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...30") %>%
  mutate("Month"= "Oct20")

nov20_filtered <- nov20 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...30") %>%
  mutate("Month"= "Nov20")

dec20_filtered <- dec20 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...30") %>%
  mutate("Month"= "Dec20")

jan21_filtered <- jan21 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...31") %>%
  mutate("Month"= "Jan21")

feb21_filtered <- feb21 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan", 
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...31") %>%
  mutate("Month"= "Feb21")

mar21_filtered <- mar21 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan",
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...31") %>%
  mutate("Month"= "Mar21")

apr21_filtered <- apr21 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan",
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...31") %>%
  mutate("Month"= "Apr21")

may21_filtered <- may21 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan",
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...31") %>%
  mutate("Month"= "May21")

jun21_filtered <- jun21 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan",
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...31") %>%
  mutate("Month"= "Jun21")

jul21_filtered <- jul21 %>% 
  select("ID_KEL", "Nama_provinsi", "nama_kota", "nama_kecamatan",
         "nama_kelurahan", "POSITIF", "Meninggal"="Meninggal...31") %>%
  mutate("Month"= "Jul21")

4.2.2 Integrating monthly data

covidDKI <- bind_rows(mar20_filtered, apr20_filtered, may20_filtered,
                      jun20_filtered, jul20_filtered, aug20_filtered,
                      sep20_filtered, oct20_filtered, nov20_filtered,
                      dec20_filtered, jan21_filtered, feb21_filtered,
                      mar21_filtered, apr21_filtered, may21_filtered,
                      jun21_filtered, jul21_filtered)

covidDKI

4.2.3 Checking for NA values

Since the value TRUE is returned, it implies that the dataset contains NA values.

sum(is.na(covidDKI)) > 0
The functopm drop_na is used to drop the NA values in the dataset.
covidDKI <- drop_na(covidDKI)

4.2.4 Data Cleaning

While looking through the cleaned dataset, it can be observed there are rows where the ID_KEL column has non-integer values. These observations are therefore removed.

covidDKI <- covidDKI %>%
  filter(`ID_KEL` != "LUAR DKI JAKARTA") %>% 
  filter(`ID_KEL` != "PROSES UPDATE DATA") %>% 
  filter(`ID_KEL` != "BELUM DIKETAHUI")

Moreover, several data points in the column “nama_kelurahan” has values in both the short and long form. There is therefore a need to convert those values in the short form to the long form to aid later data wrangling.

covidDKI$nama_kelurahan <- gsub("P\\.", "PULAU", covidDKI$nama_kelurahan)

4.2.5 Save the aspatial data as RDS

covidDKI_rds <- write_rds(covidDKI, "data/rds/covidDKI.rds")

The RDS aspatial data is now used to proceed with the later analysis.

covidDKI <- read_rds("data/rds/covidDKI.rds")

4.2.6 Prepare cumulative monthly data in every column

Since it is required later to plot the cumulative confirmed cases rates and death rates by month, the dataset is pivoted to a wide version.

covidDKI <- covidDKI %>%
  group_by(ID_KEL, Nama_provinsi, nama_kota, nama_kecamatan, 
           nama_kelurahan, Month) %>%
  summarise(`MENINGGAL` = sum(`Meninggal`), `POSITIF` = sum(`POSITIF`)) %>%
  ungroup() %>%
  pivot_wider(names_from=Month, values_from=c(POSITIF, MENINGGAL))

4.3 Combine aspatial and geospatial data

The function right_join() is used to combine the aspatial and geospatial data.

jakartaDKI <- right_join(covidDKI, DKI23845,
                         by= c("ID_KEL" = "KODE_DESA"))

4.3.1 Checking for NA values

Since the value returned is FALSE, there are no NA vlues in the dataset.

sum(is.na(jakartaDKI)) > 0
[1] FALSE

4.4 Calculate cumulative confirmed cases and death rates by month

There is a need to generate the total number of confirmed cases and deaths in each area using rowSums() and generate 2 new columns based on these values using mutate().

jakartaDKI <- jakartaDKI %>%
  mutate(`TOTAL POSITIF` = rowSums(.[6:22])) %>%
  mutate(`TOTAL MENINGGAL` = rowSums(.[23:39]))

The cumulative confirmed cases and death rates (per 10000 population) by month is then calculated.

jakartaDKI_rates <- jakartaDKI 

jakartaDKI_rates[6:22] <- 
  (jakartaDKI_rates[6:22] / jakartaDKI_rates$JUMLAH_PEN) * 10000

jakartaDKI_rates[23:39] <-
  (jakartaDKI_rates[23:39] / jakartaDKI_rates$JUMLAH_PEN) * 10000

4.5 Calculate Relative Risk

jakartaDKI_risk <- jakartaDKI %>%
  mutate(`RELATIVE_RISK` = (rowSums(.[23:39]) * 100) / 
           (rowSums(.[23:39]) * jakartaDKI$JUMLAH_PEN))